all_genes = read.csv("~/data/feature_file/gene_master.csv")
yeast_gene = subset(all_genes,chr!='chrM' & all_genes$overlap==F)
n = nrow(yeast_gene)


all_genes$tandem_A = FALSE
all_genes$tandem_B = FALSE
all_genes$promoter_width = NA
for(i in 2:(n-1)){
  if(all_genes$strand[i]=='+'){
    if(all_genes$strand[i-1]=='+' & all_genes$chr[i-1]==all_genes$chr[i]){
      all_genes$tandem_A[i]=TRUE
      all_genes$tandem_B[i-1]=TRUE
      if(!is.na(all_genes$tss[i])){
        all_genes$promoter_width[i] = all_genes$tss[i] - all_genes$end[i-1]
      }
      
    }
  }else{
    if(all_genes$strand[i+1]=='-' & all_genes$chr[i+1]==all_genes$chr[i]){
      all_genes$tandem_A[i]=TRUE
      all_genes$tandem_B[i+1]=TRUE
      if(!is.na(all_genes$tss[i])){
        all_genes$promoter_width[i] = all_genes$start[i+1] - all_genes$tss[i]
      }
      
    }
    
  }
}

for (i in 1:nrow(tandem_gene_A_sub)) {
  a= which(all_genes$gene==tandem_gene_A_sub$gene[i])
  tandem_gene_A_sub$promoter_width[i]=all_genes$promoter_width[a]
}
tandem_gene_A = subset(yeast_gene,tandem_A & tss >0)
tandem_gene_B = subset(yeast_gene,tandem_B)

as.df = data.frame('gene'=wt_AS_rep1$gene,'log2_cac_wt'=log2((cac_AS_rep1$s60+0.001)/(wt_AS_rep1$s60+0.001)))
as.df = as.df[order(as.df$log2_cac_wt,decreasing = TRUE),]
as_gene = as.df[1:520,]


read.count = read_csv('~/data/RNA/rep1/read_count.csv')
#count in samtools
read.count = data.frame('F'=c(13532569,6469402,14068365,13985796),'R'=c(12754081,6929160,12731020,14135796))

F_list = c("wt_60_F","cac_60_F","wt_af_F","cac_af_F")
R_list = c("wt_60_R","cac_60_R","wt_af_R","cac_af_R")

AS_v.list_B_orf = list('wt'=rep(0,2001),'cac'=rep(0,2001))
sense_v.list_B_orf = list('wt'=rep(0,2001),'cac'=rep(0,2001))

tandem_gene_A_sub = subset(yeast_gene,tandem_A & tss >0 & gene%in%as_gene$gene)
AS_v.list_A_sub_tss = list('wt_60'=rep(0,2001),'cac_60'=rep(0,2001),'wt_af'=rep(0,2001),'cac_af'=rep(0,2001))
sense_v.list_A_sub_tss = list('wt_60'=rep(0,2001),'cac_60'=rep(0,2001),'wt_af'=rep(0,2001),'cac_af'=rep(0,2001))


for(k in 1:4){
  for (i in 1:nrow(tandem_gene_A_sub)) {
    if(i %% 200 ==0){
      cat(paste('gene # ',i,'\n'))
    }
    chr=tandem_gene_A_sub$chr[i]
    
    file_name.bam_A = paste("~/data/RNA/rep1/",F_list[k],".bam", sep='')
    file_name.bam.bai_A = paste("~/data/RNA/rep1/",F_list[k],".bam.bai", sep='') 
    
    file_name.bam_B = paste("~/data/RNA/rep1/",R_list[k],".bam", sep='')
    file_name.bam.bai_B = paste("~/data/RNA/rep1/",R_list[k],".bam.bai", sep='')
    
    
    if(tandem_gene_A_sub$strand[i]=="+"){
      new_start = tandem_gene_A_sub$tss[i]-1000
      new_end = tandem_gene_A_sub$tss[i]+1000
      
      chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
      
      p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
      
      #count sense
      reads.l = scanBam(file = file_name.bam_A,
                        index = file_name.bam.bai_A,
                        param = p)
      reads.gr = GRanges(seqnames = reads.l[[1]]$rname,
                         ranges = IRanges(start = reads.l[[1]]$pos,
                                          width = reads.l[[1]]$isize))
      
      
      ranges(reads.gr)<- IRanges(start = mid(ranges(reads.gr)),width = 1)
      window_1.gr <- GRanges(seqnames = chr, IRanges(start = seq(new_start,new_end, by = 1),width = 1))
      sense_v.list_A_sub_tss[[k]] =sense_v.list_A_sub_tss[[k]]+(countOverlaps(window_1.gr, reads.gr)/read.count[k,1] * 1000000)
      
      #count AS
      reads.l = scanBam(file = file_name.bam_B,
                        index = file_name.bam.bai_B,
                        param = p)
      reads.gr = GRanges(seqnames = reads.l[[1]]$rname,
                         ranges = IRanges(start = reads.l[[1]]$pos,
                                          width = reads.l[[1]]$isize))
      
      ranges(reads.gr)<- IRanges(start = mid(ranges(reads.gr)),width = 1)
      window_1.gr <- GRanges(seqnames = chr, IRanges(start = seq(new_start,new_end, by = 1),width = 1))
      AS_v.list_A_sub_tss[[k]] =AS_v.list_A_sub_tss[[k]]+(countOverlaps(window_1.gr, reads.gr)/read.count[k,2] * 1000000)
      
    }else{
      new_start = tandem_gene_A_sub$tss[i]-1000
      new_end = tandem_gene_A_sub$tss[i]+1000
      
      chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
      
      p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
      
      
      reads.l = scanBam(file = file_name.bam_A,
                        index = file_name.bam.bai_A,
                        param = p)
      reads.gr = GRanges(seqnames = reads.l[[1]]$rname,
                         ranges = IRanges(start = reads.l[[1]]$pos,
                                          width = reads.l[[1]]$isize))

      ranges(reads.gr)<- IRanges(start = mid(ranges(reads.gr)),width = 1)
      window_1.gr <- GRanges(seqnames = chr, IRanges(start = seq(new_start,new_end, by = 1),width = 1))
      AS_v.list_A_sub_tss[[k]] =AS_v.list_A_sub_tss[[k]]+(rev(countOverlaps(window_1.gr, reads.gr))/read.count[k,1] * 1000000)

      #count AS
      reads.l = scanBam(file = file_name.bam_B,
                        index = file_name.bam.bai_B,
                        param = p)
      reads.gr = GRanges(seqnames = reads.l[[1]]$rname,
                         ranges = IRanges(start = reads.l[[1]]$pos,
                                          width = reads.l[[1]]$isize))

      ranges(reads.gr)<- IRanges(start = mid(ranges(reads.gr)),width = 1)
      window_1.gr <- GRanges(seqnames = chr, IRanges(start = seq(new_start,new_end, by = 1),width = 1))
      sense_v.list_A_sub_tss[[k]] =sense_v.list_A_sub_tss[[k]]+(rev(countOverlaps(window_1.gr, reads.gr))/read.count[k,2] * 1000000)
    }
    
  }
  
}

x = stats::filter(seq(-1000,1000,1),rep(1,10)/10)[100:1900]

sense_wt_60 = stats::filter(sense_v.list_A_tss[[1]]+0.001,rep(1,10)/10)[100:1900]
as_wt_60 = stats::filter(AS_v.list_A_tss[[1]]+0.001,rep(1,10)/10)[100:1900]
sense_cac_60 = stats::filter(sense_v.list_A_tss[[2]]+0.001,rep(1,10)/10)[100:1900]
as_cac_60 = stats::filter(AS_v.list_A_tss[[2]]+0.001,rep(1,10)/10)[100:1900]


px = c(-233,-650,-650,-233)
py=c(20,20,-20,-20)
polygon(px,py,col='grey')

png(file = '~/data/figure/as_at_promoter.png', width = 4, height = 3, units = "in", res = 300, bg = "white", type = "cairo-png" )
par(mar = c(4,4,2,1), cex=0.8, cex.main=0.7) 
plot(x,log2(sense_cac_60/sense_wt_60),type='l',ylim=c(-2,5),ylab=expression(paste('log2(',italic(cac1),Delta,'/WT)')),xlab = '(bp)',xaxt='n',col='white')
polygon(px,py,col=alpha('grey',0.3),border = NA)
lines(x,log2(sense_cac_60/sense_wt_60))
lines(x,log2(as_cac_60/as_wt_60),col='purple')
abline(v= -median(tandem_gene_A_sub$promoter_width),lty=2)
axis(side = 1, at = c(-500,0,500),labels = c(-500,'TSS',500))
legend('topleft',c('Sense','Antisense'),col=c('black','purple'),cex=0.9,lty=c(1,1),bty = 'n')
dev.off()


